#!/usr/bin/env python
# coding: utf-8

# In[1]:


exec(open('init_path.py').read())
exec((P_Lib/'GasStation.py').read_text())
get_ipython().run_line_magic('matplotlib', 'inline')


# In[2]:


h_min = 7; h_max = 21
# ls_fuel = ['diesel','e5','e10']
fuel = 'e5'


# Clustering Rival
f = P_GS_Data / 'Market' / 'all' / '20_80.csv'
df = read_csv(f)
df = df.groupby('MktID').StID.apply(list)
ls_rivals = df[df.str.len()==2].tolist()
GSRival = DataFrame(ls_rivals, columns = ['O','D'])
GSRival = GSRival.append(GSRival.rename(columns={'O':'D','D':'O'}))
dict_stid_to_rival = GSRival.set_index('O').D.to_dict()



rival_price_response_duration_threshold = 5 # in minutes


# In[36]:


fuel = 'e5'
def count_pc_response_types(f):
    df = read_hdf(f, 'GS')
    # df = df.loc[df.StID.isin([9,208,13,4426]), ['StID','Time',fuel]].copy()
    ls_stids = df.StID.unique().tolist()
    ls_stids = GSRival[GSRival.O.isin(ls_stids) & GSRival.D.isin(ls_stids)].O.tolist()
    df = df.loc[df.StID.isin(ls_stids), ['StID','Time',fuel]].copy()
    df = df[df[fuel]!=-1].copy()
    df['PC'] = df[fuel] - df.groupby('StID')[fuel].shift(1)
    df = df[(df.PC!=0) & df.PC.notnull()].copy()
    df['Sign'] = np.sign(df.PC).astype(int)
    RivalPCSignCount = df.groupby('StID').Sign.value_counts().unstack(1).fillna(0).astype(int).reset_index()
    RivalPCSignCount.columns = ['Rival','nInc','nDec']
    df['H'] = df.Time.dt.hour; df = df[(df.H>=h_min) & (df.H<=h_max)]
    Me = df[['StID','Time','Sign']].rename(columns={'StID':'Me','Time':'ResponseTime','Sign':'SignMe'})
    Me['Rival'] = Me.Me.map(dict_stid_to_rival)
    Rival = df[['StID','Time','Sign']].rename(columns={'StID':'Rival','Time':'RivalTime','Sign':'SignRival'})
    df = merge(Me, Rival) # merge Me & Rival by 'Rival' column
    df = df[df.ResponseTime>=df.RivalTime].copy() # Only ResponseTime>=RivalTime
    df['Rival'] = df.Rival.astype(int)
    df = df[(df.ResponseTime - df.RivalTime).dt.seconds / 60 < rival_price_response_duration_threshold].copy()
    df = df.sort_values(by=['Rival','RivalTime','ResponseTime']).drop_duplicates(['Me','ResponseTime'], keep='last')
    df['RivalMeResponseSign'] = df.SignRival.astype(str) + df.SignMe.astype(str)
    RivalMeResponseSign = df.groupby(['Me','RivalMeResponseSign']).Rival.count().unstack(1).fillna(0)
    RivalMeResponseSign = RivalMeResponseSign.reindex(ls_stids).fillna(0).reset_index()
    RivalMeResponseSign.columns.name = None
    RivalMeResponseSign['Rival'] = RivalMeResponseSign.Me.map(dict_stid_to_rival)
    df = merge(RivalMeResponseSign, RivalPCSignCount, how='left').fillna(0).astype(int)
    df['YMD'] = int(f.name.split('/')[-1].split('.')[0])
    return df



files = sorted(list((P_GS_Data_Raw / 'PH_Day').glob('*.h5')))
print(len(files))
ls_strYMD_weekday_nonholiday = load_obj(P_GS_Data / 'GS' / 'ls_strYMD_weekday_nonholiday.pkl')
files = [f for f in files if f.stem in ls_strYMD_weekday_nonholiday]
print(len(files))


# In[39]:


# In[40]:


ls = []
for f in files:
    ls.append(count_pc_response_types(f))


# In[41]:


df = concat(ls)
df.head()


# In[42]:


# In[43]:


df.columns = ['StID','DD','DU','UD','UU','Rival','URival','DRival','YMD']
df = df.fillna(0).astype(int)
df.head(2)


# In[44]:


rival_type = 'cluster_rival_all_20_80'


# In[45]:

fname = 'price_change_'+'wi'+str(rival_price_response_duration_threshold)+'m'+'_response_type_count-'+rival_type+'-'+fuel
print(fname)
f = P_GS_Data / 'PCResponse' / (fname + '.dta')
df.to_stata(f, write_index=False)
